import requests
from lxml import etree


Agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36 Edg/138.0.0.0"

# html = requests.get(url=url,headers={'user-agent':Agent}).text
# element = etree.HTML(html)

# https://www.jkl.com.cn/newsList.aspx?current=2&TypeId=10009

keys = []
values = []

i = 1
while True:
    # https://www.jkl.com.cn/newsList.aspx?current=2&TypeId=10009
    url = f"https://www.jkl.com.cn/newsList.aspx?current={i}&TypeId=10009"
    html = requests.get(url).text
    element = etree.HTML(html)
    c_url = element.xpath('//div[@class="newsLis"]//li/a/@href')
    c_name = element.xpath('//div[@class="newsLis"]//li/a/text()')
    end_page = element.xpath('//a[text()="尾页"]/@href')

    # 去除文件名中的字符
    for name in c_name:
        print(name)
        pdf_name = name.strip()
        keys.append(pdf_name)


    # 补全获取到的url
    for pdf_url in c_url:
        pdf_ual = "https://www.jkl.com.cn" + pdf_url
        values.append(pdf_ual)


    if end_page == []:
        break
    i = i + 1


pdf_dict = dict(zip(keys,values))

for key,value in pdf_dict.items():
#     获取文件后缀名
    suffix = value.split('.')[-1]
    #获取二进制文件
    pdf_date = requests.get(value).content
    # 指定文件存储路径
    pdf_path = "./file/财务资料/" + key + "." + suffix
    with open(pdf_path,"wb") as f:
        f.write(pdf_date)
        print(key,"下载完毕")
